## Table 3

#directory
#set working directory to the path PvP_Replication
#setwd("~/PvP_Replication")

#packages
library(tidyverse) #for data cleaning and manipulation
library(estimatr) #estimation
library(texreg) #generate latex table 

#load main dataset
pvp_data <- read.csv("PvP_data/PvP_data_main.csv")

#transform independent variable
#create binary (hectares > 100) and log + 1 cultivation variables 
pvp_data <- pvp_data %>% 
  mutate(logcult = log(maxcult + 1))

#subset data to municipalities with prior FARC presence
pvp_farc_subset <- pvp_data %>% filter(farc_presence == 1)

#iv models

#prep weather and soil variables for iv
pvp_farc_subset <- pvp_farc_subset %>% mutate(
  draindummy = as.factor(ifelse(DRAIN == "M", 1, 0)), #moderate drainage
  humiddist = case_when(humid_mean < 85 ~ (85 - humid_mean)^2, humid_mean > 85 ~ (humid_mean - 85)^2, TRUE ~ 0 ), #distance from optimal humidity point
  temprangedist = case_when(maxtemp > 27 ~ (maxtemp - 27)^2, mintemp < 14  ~ (14 - mintemp)^2, TRUE ~ 0), #distance from optimal temp, wide band
)

#instrument for continuous independent variable
pvp_farc_subset$ols_inst <- predict(lm(logcult ~ PHAQ + temprangedist + TOTN + TOTC + sun_mean + humiddist + raintot_me + draindummy, pvp_farc_subset))

#scale variables
pvp_farc_subset <- pvp_farc_subset %>% 
  mutate(elev_stdev_scaled = as.numeric(scale(elev_stdev)), 
         coca_log_scaled = as.numeric(scale(logcult)), 
         base_dist_scaled = as.numeric(scale(milcentdist)), 
         farcprop18_scale = as.numeric(scale(farcprop18)))


#estimate ols and iv models
ols_mil_control <- lm_robust(dissident_presence ~ coca_log_scaled + base_dist_scaled, data = pvp_farc_subset)
iv_mil_control <- iv_robust(dissident_presence ~ coca_log_scaled + base_dist_scaled | ols_inst + base_dist_scaled, data = pvp_farc_subset)
ols_terr_control <- lm_robust(dissident_presence ~ coca_log_scaled + elev_stdev_scaled, pvp_farc_subset)
iv_terr_control <- iv_robust(dissident_presence ~ coca_log_scaled + elev_stdev_scaled | ols_inst + elev_stdev_scaled, data = pvp_farc_subset)
ols_pol_control <- lm_robust(dissident_presence ~ coca_log_scaled + farcprop18_scale, pvp_farc_subset)
iv_pol_control <- iv_robust(dissident_presence ~ coca_log_scaled + farcprop18_scale | ols_inst + farcprop18_scale, data = pvp_farc_subset)


#table for results
texreg(
  list(ols_mil_control, iv_mil_control, ols_terr_control, iv_terr_control, ols_pol_control, iv_pol_control),
  custom.coef.names	= c(NA, "Coca Cultivation", "Military Base Distance", "Terrain Ruggedness", "FARC Party Support"),
  digits = 2, include.ci = FALSE, single.row = FALSE, include.fstatistic = TRUE, include.rmse = FALSE, include.rsquared = FALSE, 
  include.adjrs = FALSE, include.nobs = TRUE, stars = c(0.001, 0.01, 0.05), float.pos = "h", caption.above	= TRUE, 
  caption = "Competing Explanations for Splinter Group Emergence")
